library(cytomapper)
library(dplyr)
library(ggplot2)
library(simpleSeg)
library(FuseSOM)
library(ggpubr)
library(scater)
library(spicyR)
#library(ClassifyR)
library(ClassifyR, lib.loc = "~/localPackages/")
library(scFeatures)
library(lisaClust)
theme_set(theme_classic())
#withr::with_libpaths(new = "~/localPackages/", devtools::install_github("SydneyBioX/ClassifyR"))
nCores <- 20
BPPARAM <- simpleSeg:::generateBPParam(nCores)
Our images are stored in the images folder within the
Data folder. Here we use readImages() from the
EBImage package to read these into R. If memory is a
restricting factor, and the files are in a slightly different format,
you could use loadImages() from the cytomapper
package to load all of the tiff images into a CytoImageList
object, which can store the images as h5 on-disk.
pathToImages <- "Data/images"
# Get directories of images
imageDirs <- dir(pathToImages, full.names = TRUE)
names(imageDirs) <- dir(pathToImages, full.names = FALSE)
# Get files in each directory
files <- sapply(imageDirs, list.files, pattern = "tif", full.names = TRUE, simplify = FALSE)
# Read files with readImage from EBImage
images <- lapply(files, EBImage::readImage, as.is = TRUE)
We will make use of the on_disk option to convert our
images to a CytoImageList with the images not held in
memory.
# Store images in a CytoImageList with images on_disk as h5 files to save memory.
dir.create("Data/h5Files")
images <- cytomapper::CytoImageList(images,
on_disk = TRUE,
h5FilesPath = "Data/h5Files",
BPPARAM = BPPARAM)
gc()
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 17218109 919.6 24979905 1334.1 24979905 1334.1
## Vcells 81827702 624.3 3057820234 23329.4 3822275292 29161.7
# images <- cytomapper::loadImages("Data/h5Files/", pattern = "h5", h5FilesPath = "Data/h5Files/",on_disk = TRUE)
# mcols(images) <- S4Vectors::DataFrame(imageID = names(images))
clinical <- read.csv("Data/1-s2.0-S0092867421014860-mmc1.csv")
clinical <- clinical |>
mutate(imageID = paste0("Point", PointNumber, "_pt", Patient_ID, "_", TMAD_Patient))
clinical$imageID[grep("normal", clinical$Tissue_Type)] <- paste0(clinical$imageID[grep("normal", clinical$Tissue_Type)], "_Normal")
clinicalVariables <- c("imageID", "Patient_ID","Status", "Age", "SUBTYPE", "PAM50", "Treatment", "DCIS_grade", "Necrosis")
rownames(clinical) <- clinical$imageID
mcols(images) <- clinical[names(images), clinicalVariables]
masks <- simpleSeg(images,
nucleus = c("PCA", "HH3"),
cellBody = "dilate",
transforms = "sqrt",
sizeSelection = 40,
discSize = 2,
cores = nCores)
EBImage::display(colorLabels(masks[[1]]))
cytomapper::plotPixels(image = images[1:3],
mask = masks[1:3],
img_id = "imageID",
colour_by = c("PanKRT", "GLUT1", "HH3", "CD3", "CD20"),
display = "single",
colour = list(HH3 = c("black","blue"),
CD3 = c("black","purple"),
CD20 = c("black","green"),
GLUT1 = c("black", "red"),
PanKRT = c("black", "yellow")),
bcg = list(HH3 = c(0, 1, 1.5),
CD3 = c(0, 1, 1.5),
CD20 = c(0, 1, 1.5),
GLUT1 = c(0, 1, 1.5),
PanKRT = c(0, 1, 1.5)),
legend = NULL)
cells <- cytomapper::measureObjects(masks,
images,
img_id = "imageID",
BPPARAM = BPPARAM)
df <- as.data.frame(cbind(colData(cells), t(assay(cells, "counts"))))
ggplot(df, aes(x = PanKRT, colour = imageID)) +
geom_density() +
theme(legend.position = "none")
cells <- normalizeCells(cells,
transformation = "sqrt",
method = c("quant99", "minMax"),
assayIn = "counts",
cores = nCores)
df <- as.data.frame(cbind(colData(cells), t(assay(cells, "norm"))))
ggplot(df, aes(x = PanKRT, colour = imageID)) +
geom_density() +
theme(legend.position = "none")
#The markers used in the original publication to gate cell types
useMarkers <- c("PanKRT", "ECAD", "CK7", "VIM", "FAP", "CD31", "CK5", "SMA",
"CD45", "CD4", "CD3", "CD8", "CD20", "CD68", "CD14", "CD11c",
"HLADRDPDQ", "MPO", "Tryptase")
set.seed(51773)
cells <- runFuseSOM(cells,
markers = useMarkers,
assay = 'norm',
numClusters = 20)
# As I've already run runFuseSOM I don't need to run generateSOM()
cells <- estimateNumCluster(cells, kseq = 2:30)
##
========
=================
==========================
==================================
==========================================
===================================================
============================================================
====================================================================
============================================================================
=====================================================================================
optiPlot(cells, method = "gap")
scater::plotGroupedHeatmap(cells,
features = useMarkers,
group = "clusters",
exprs_values = "norm",
center = TRUE,
scale = TRUE,
zlim = c(-3,3),
cluster_rows = FALSE)
colData(cells)$clusters |>
table() |>
sort()
##
## cluster_2 cluster_5 cluster_16 cluster_10 cluster_7 cluster_18 cluster_11
## 351 361 378 418 570 666 702
## cluster_6 cluster_13 cluster_3 cluster_14 cluster_4 cluster_20 cluster_12
## 869 971 1090 1405 1484 1626 1767
## cluster_15 cluster_8 cluster_19 cluster_17 cluster_9 cluster_1
## 2148 3396 3612 3625 5418 31703
cellsToUse <- cells$Status%in%c("nonprogressor", "progressor")
testProp <- colTest(cells[, cellsToUse],
condition = "Status",
feature = "clusters")
testProp
## W pval adjPval cluster
## cluster_2 190 0.024 0.48 cluster_2
## cluster_13 390 0.130 0.58 cluster_13
## cluster_6 230 0.150 0.58 cluster_6
## cluster_17 380 0.170 0.58 cluster_17
## cluster_14 380 0.180 0.58 cluster_14
## cluster_1 240 0.200 0.58 cluster_1
## cluster_15 370 0.240 0.58 cluster_15
## cluster_11 370 0.250 0.58 cluster_11
## cluster_8 240 0.260 0.58 cluster_8
## cluster_12 360 0.340 0.68 cluster_12
## cluster_9 360 0.380 0.69 cluster_9
## cluster_5 350 0.440 0.73 cluster_5
## cluster_16 280 0.590 0.83 cluster_16
## cluster_18 280 0.600 0.83 cluster_18
## cluster_10 280 0.620 0.83 cluster_10
## cluster_20 290 0.790 0.92 cluster_20
## cluster_19 320 0.810 0.92 cluster_19
## cluster_3 320 0.830 0.92 cluster_3
## cluster_7 300 0.910 0.96 cluster_7
## cluster_4 300 0.960 0.96 cluster_4
imagesToUse <- rownames(clinical)[clinical[, "Status"]%in%c("nonprogressor", "progressor")]
prop <- getProp(cells, feature = "clusters")
clusterToUse <- rownames(testProp)[1]
boxplot( prop[imagesToUse, clusterToUse] ~ clinical[imagesToUse, "Status"] )
set.seed(51773)
cells <- scater::runUMAP(cells,
subset_row = useMarkers,
exprs_values = "norm")
someImages <- unique(colData(cells)$imageID)[c(1,10,20,40,50,60)]
# UMAP by imageID
plotReducedDim(cells[,colData(cells)$imageID %in% someImages], dimred="UMAP", colour_by="imageID")
# UMAP by cell type cluster
plotReducedDim(cells[,colData(cells)$imageID %in% someImages], dimred="UMAP", colour_by="clusters")
spicyTest <- spicy(cells[, cellsToUse],
condition = "Status",
cellType = "clusters",
imageID = "imageID",
spatialCoords = c("m.cx", "m.cy"),
Rs = c(20, 50, 100),
sigma = 50,
BPPARAM = BPPARAM)
topPairs(spicyTest, n = 10)
## intercept coefficient p.value adj.pvalue from
## cluster_18__cluster_17 57.47863 103.82184 0.004269211 0.6412617 cluster_18
## cluster_17__cluster_18 56.23307 92.78098 0.006867987 0.6412617 cluster_17
## cluster_20__cluster_8 -70.27764 60.25065 0.009107886 0.6412617 cluster_20
## cluster_16__cluster_15 -141.08275 56.94516 0.009162453 0.6412617 cluster_16
## cluster_17__cluster_7 21.28862 107.32206 0.012743804 0.6412617 cluster_17
## cluster_8__cluster_20 -60.10460 60.04871 0.013838365 0.6412617 cluster_8
## cluster_15__cluster_16 -142.88187 48.39057 0.014461077 0.6412617 cluster_15
## cluster_6__cluster_6 192.73531 193.59265 0.015132520 0.6412617 cluster_6
## cluster_7__cluster_17 28.22911 107.75547 0.015205549 0.6412617 cluster_7
## cluster_9__cluster_5 -66.44860 75.52276 0.021968425 0.6412617 cluster_9
## to
## cluster_18__cluster_17 cluster_17
## cluster_17__cluster_18 cluster_18
## cluster_20__cluster_8 cluster_8
## cluster_16__cluster_15 cluster_15
## cluster_17__cluster_7 cluster_7
## cluster_8__cluster_20 cluster_20
## cluster_15__cluster_16 cluster_16
## cluster_6__cluster_6 cluster_6
## cluster_7__cluster_17 cluster_17
## cluster_9__cluster_5 cluster_5
signifPlot(spicyTest,
breaks = c(-1.5, 3, 0.5))
set.seed(51773)
cells <- lisaClust(cells,
k = 7,
Rs = c(20, 50, 100),
sigma = 50,
spatialCoords = c("m.cx", "m.cy"),
cellType = "clusters",
BPPARAM = BPPARAM)
df <- colData(cells) |>
as.data.frame() |>
filter(imageID == "Point2206_pt1116_31620")
ggplot(df, aes(x = m.cx, y = m.cy, colour = region)) +
geom_point()
# ggplot(df, aes(x = m.cx, y = m.cy, colour = clusters, region = region)) +
# geom_point() +
# geom_hatching(window = "square", line.spacing = 41) +
# scale_region_manual(values = c(region_1 = 1,
# region_9 = 1,
# region_10 = 2,
# region_7 = 2,
# region_2 = 3,
# region_3 = 4,
# region_4 = 4,
# region_5 = 5,
# region_8 = 6
# ))
hatchingPlot(cells,
useImages = "Point2206_pt1116_31620",
cellType = "clusters",
spatialCoords = c("m.cx", "m.cy"),
window = "square",
nbp = 300,
line.spacing = 41
)
testRegion <- colTest(cells[,cellsToUse],
feature = "region",
condition = "Status")
testRegion
## W pval adjPval cluster
## region_4 460 0.0067 0.047 region_4
## region_2 180 0.0190 0.066 region_2
## region_5 230 0.1400 0.330 region_5
## region_3 350 0.4900 0.860 region_3
## region_1 290 0.7400 0.910 region_1
## region_6 290 0.7800 0.910 region_6
## region_7 300 0.9600 0.960 region_7
regionMap(cells, cellType = "clusters", limit = c(0.2, 5))
data <- scFeatures(cells,
feature_types = c("proportion_raw", "gene_mean_celltype"),
sample = "imageID",
celltype = "clusters",
assay = "norm",
ncores = nCores )
## [1] "generating proportion raw features"
## [1] "generating gene mean celltype features"
names(data) <- c("prop", "mean")
imagesToUse <- rownames(clinical)[clinical[, "Status"]%in%c("nonprogressor", "progressor")]
test <- colTest(data$mean[imagesToUse,],
condition = clinical[imagesToUse, "Status"])
test |> head()
## W pval adjPval cluster
## cluster_19--PDL1 210 0.0016 0.69 cluster_19--PDL1
## cluster_3--PD1 200 0.0043 0.69 cluster_3--PD1
## cluster_6--PDL1 170 0.0053 0.69 cluster_6--PDL1
## cluster_5--P 460 0.0055 0.69 cluster_5--P
## cluster_19--P 460 0.0072 0.69 cluster_19--P
## cluster_17--P 460 0.0073 0.69 cluster_17--P
data[["regions"]] <- getProp(cells, "region")
data <- lapply(data, as.data.frame)
#Subset data
measurements <- lapply(data, function(x)x[imagesToUse, ])
set.seed(51773)
cv <- crossValidate(measurements = measurements,
outcome = clinical[imagesToUse, "Status"],
classifier = "elasticNetGLM",
nFolds = 5,
nRepeats = 100,
nCores = nCores
)
ROCplot(cv, comparison = "Assay Name")
performancePlot(cv,
performanceName = "AUC",
characteristicsList = list(x = "Assay Name"))